row.names(mtcars)
## [1] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710"
## [4] "Hornet 4 Drive" "Hornet Sportabout" "Valiant"
## [7] "Duster 360" "Merc 240D" "Merc 230"
## [10] "Merc 280" "Merc 280C" "Merc 450SE"
## [13] "Merc 450SL" "Merc 450SLC" "Cadillac Fleetwood"
## [16] "Lincoln Continental" "Chrysler Imperial" "Fiat 128"
## [19] "Honda Civic" "Toyota Corolla" "Toyota Corona"
## [22] "Dodge Challenger" "AMC Javelin" "Camaro Z28"
## [25] "Pontiac Firebird" "Fiat X1-9" "Porsche 914-2"
## [28] "Lotus Europa" "Ford Pantera L" "Ferrari Dino"
## [31] "Maserati Bora" "Volvo 142E"
PRESS <- function(linear.model) {
#' calculate the predictive residuals
pr <- residuals(linear.model)/(1-lm.influence(linear.model)$hat)
#' calculate the PRESS
PRESS <- sum(pr^2)
return(PRESS)
}
pred_r_squared <- function(linear.model) {
#' Use anova() to get the sum of squares for the linear model
lm.anova <- anova(linear.model)
#' Calculate the total sum of squares
tss <- sum(lm.anova$'Sum Sq')
# Calculate the predictive R^2
pred.r.squared <- 1-PRESS(linear.model)/(tss)
return(pred.r.squared)
}
model_fit_stats <- function(linear.model) {
r.sqr <- summary(linear.model)$r.squared
adj.r.sqr <- summary(linear.model)$adj.r.squared
pre.r.sqr <- pred_r_squared(linear.model)
PRESS <- PRESS(linear.model)
return.df <- data.frame(r.squared = r.sqr, adj.r.squared = adj.r.sqr, pred.r.squared = pre.r.sqr, press = PRESS)
return(return.df)
}
lmp <- function (modelobject) {
if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
f <- summary(modelobject)$fstatistic
p <- pf(f[1],f[2],f[3],lower.tail=F)
attributes(p) <- NULL
return(p)
}
library(datasets)
library(ggplot2)
library(plyr)
data(mtcars)
sum(is.na(mtcars))
## [1] 0
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
par(mfrow=c(2,2))
qplot(mpg, geom="histogram",data=mtcars,binwidth=0.5)
qplot(wt, geom="histogram",data=mtcars,binwidth=0.5)
qplot(cyl, geom="histogram",data=mtcars,binwidth=0.5)
qplot(disp, geom="histogram",data=mtcars,binwidth=0.5)
par(mfrow=c(2,2))
qplot(hp, geom="histogram",data=mtcars,binwidth=0.5)
qplot(carb, geom="histogram",data=mtcars,binwidth=0.5)
qplot(qsec, geom="histogram",data=mtcars,binwidth=0.5)
qplot(gear, geom="histogram",data=mtcars,binwidth=0.5)
par(mfrow=c(2,2))
qplot(am, geom="histogram",data=mtcars,binwidth=0.5)
qplot(vs, geom="histogram",data=mtcars,binwidth=0.5)
qplot(drat, geom="histogram",data=mtcars,binwidth=0.5)
qplot(wt,mpg,data=mtcars,geom=c("point","smooth"),method="loess")
qplot(disp,mpg,data=mtcars,geom=c("point","smooth"),method="loess")
qplot(hp,mpg,data=mtcars,geom=c("point","smooth"),method="loess")
qplot(qsec,mpg,data=mtcars,geom=c("point","smooth"),method="loess")
qplot(drat,mpg,data=mtcars,geom=c("point","smooth"),method="loess")
table(mtcars$am)
##
## 0 1
## 19 13
table(mtcars$cyl)
##
## 4 6 8
## 11 7 14
table(mtcars$vs)
##
## 0 1
## 18 14
table(mtcars$gear)
##
## 3 4 5
## 15 12 5
table(mtcars$carb)
##
## 1 2 3 4 6 8
## 7 10 3 10 1 1
boxplot(mpg ~ factor(am), data = mtcars, xlab="transmission", ylab="mpg")
mean(subset(mtcars, am == 0)$mpg)
## [1] 17.14737
mean(subset(mtcars, am == 1)$mpg)
## [1] 24.39231
lm01 <- lm(mpg ~ factor(am), data = mtcars)
summary(lm01)
##
## Call:
## lm(formula = mpg ~ factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3923 -3.0923 -0.2974 3.2439 9.5077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.147 1.125 15.247 1.13e-15 ***
## factor(am)1 7.245 1.764 4.106 0.000285 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.902 on 30 degrees of freedom
## Multiple R-squared: 0.3598, Adjusted R-squared: 0.3385
## F-statistic: 16.86 on 1 and 30 DF, p-value: 0.000285
pred_r_squared(lm01)
## [1] 0.2626133
par(mfrow=c(2,2))
plot(lm01)
sort(cor(mtcars)[1,]) # wt,cyl,disp,hp,carb,qsec,gear,am,vs,drat,mpg
## wt cyl disp hp carb qsec
## -0.8676594 -0.8521620 -0.8475514 -0.7761684 -0.5509251 0.4186840
## gear am vs drat mpg
## 0.4802848 0.5998324 0.6640389 0.6811719 1.0000000
cor(mtcars$cyl,mtcars$disp)
## [1] 0.9020329
cor(mtcars$wt,mtcars$disp)
## [1] 0.8879799
cor(mtcars$cyl,mtcars$hp)
## [1] 0.8324475
cor(mtcars$disp,mtcars$hp)
## [1] 0.7909486
cor(mtcars$wt,mtcars$cyl)
## [1] 0.7824958
cor(mtcars$wt,mtcars$hp)
## [1] 0.6587479
lm02 <- lm(mpg ~ wt + factor(am), data = mtcars)
summary(lm02)
##
## Call:
## lm(formula = mpg ~ wt + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5295 -2.3619 -0.1317 1.4025 6.8782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.32155 3.05464 12.218 5.84e-13 ***
## wt -5.35281 0.78824 -6.791 1.87e-07 ***
## factor(am)1 -0.02362 1.54565 -0.015 0.988
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.098 on 29 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7358
## F-statistic: 44.17 on 2 and 29 DF, p-value: 1.579e-09
pred_r_squared(lm02)
## [1] 0.6874984
library(car)
vif(lm02)
## wt factor(am)
## 1.921413 1.921413
par(mfrow=c(2,2))
plot(lm02)
q02 <- qplot(wt, mpg, data=mtcars, colour=am)
q02 <- q02 + geom_abline(intercept=summary(lm02)$coef[1,1], slope=summary(lm02)$coef[2,1], col="red")
q02
lm03 <- lm(mpg ~ cyl + factor(am), data = mtcars)
summary(lm03)
##
## Call:
## lm(formula = mpg ~ cyl + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.6856 -1.7172 -0.2657 1.8838 6.8144
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.5224 2.6032 13.262 7.69e-14 ***
## cyl -2.5010 0.3608 -6.931 1.28e-07 ***
## factor(am)1 2.5670 1.2914 1.988 0.0564 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.059 on 29 degrees of freedom
## Multiple R-squared: 0.759, Adjusted R-squared: 0.7424
## F-statistic: 45.67 on 2 and 29 DF, p-value: 1.094e-09
pred_r_squared(lm03)
## [1] 0.7082706
library(car)
vif(lm03)
## cyl factor(am)
## 1.375739 1.375739
par(mfrow=c(2,2))
plot(lm03)
q03 <- qplot(cyl, mpg, data=mtcars, colour=am)
q03 <- q03 + geom_abline(intercept=summary(lm03)$coef[1,1], slope=summary(lm03)$coef[2,1], col="red")
q03
lm04 <- lm(mpg ~ disp + factor(am), data = mtcars)
summary(lm04)
##
## Call:
## lm(formula = mpg ~ disp + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6382 -2.4751 -0.5631 2.2333 6.8386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.848081 1.834071 15.184 2.45e-15 ***
## disp -0.036851 0.005782 -6.373 5.75e-07 ***
## factor(am)1 1.833458 1.436100 1.277 0.212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.218 on 29 degrees of freedom
## Multiple R-squared: 0.7333, Adjusted R-squared: 0.7149
## F-statistic: 39.87 on 2 and 29 DF, p-value: 4.749e-09
pred_r_squared(lm04)
## [1] 0.6770371
library(car)
vif(lm04)
## disp factor(am)
## 1.537396 1.537396
par(mfrow=c(2,2))
plot(lm04)
q04 <- qplot(disp, mpg, data=mtcars, colour=am)
q04 <- q04 + geom_abline(intercept=summary(lm04)$coef[1,1], slope=summary(lm04)$coef[2,1], col="red")
q04
lm05 <- lm(mpg ~ wt + cyl + disp + hp + factor(am), data = mtcars)
summary(lm05)
##
## Call:
## lm(formula = mpg ~ wt + cyl + disp + hp + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5952 -1.5864 -0.7157 1.2821 5.5725
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.20280 3.66910 10.412 9.08e-11 ***
## wt -3.30262 1.13364 -2.913 0.00726 **
## cyl -1.10638 0.67636 -1.636 0.11393
## disp 0.01226 0.01171 1.047 0.30472
## hp -0.02796 0.01392 -2.008 0.05510 .
## factor(am)1 1.55649 1.44054 1.080 0.28984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.505 on 26 degrees of freedom
## Multiple R-squared: 0.8551, Adjusted R-squared: 0.8273
## F-statistic: 30.7 on 5 and 26 DF, p-value: 4.029e-10
pred_r_squared(lm05)
## [1] 0.7849818
library(car)
vif(lm05)
## wt cyl disp hp factor(am)
## 6.079452 7.209456 10.401420 4.501859 2.553064
par(mfrow=c(2,2))
plot(lm05)
lm06 <- lm(mpg ~ wt + hp + factor(am), data = mtcars)
summary(lm06)
##
## Call:
## lm(formula = mpg ~ wt + hp + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4221 -1.7924 -0.3788 1.2249 5.5317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.002875 2.642659 12.867 2.82e-13 ***
## wt -2.878575 0.904971 -3.181 0.003574 **
## hp -0.037479 0.009605 -3.902 0.000546 ***
## factor(am)1 2.083710 1.376420 1.514 0.141268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared: 0.8399, Adjusted R-squared: 0.8227
## F-statistic: 48.96 on 3 and 28 DF, p-value: 2.908e-11
pred_r_squared(lm06)
## [1] 0.7878597
library(car)
vif(lm06)
## wt hp factor(am)
## 3.774838 2.088124 2.271082
par(mfrow=c(2,2))
plot(lm06)
lm07 <- lm(mpg ~ wt + cyl + factor(am), data = mtcars)
summary(lm07)
##
## Call:
## lm(formula = mpg ~ wt + cyl + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1735 -1.5340 -0.5386 1.5864 6.0812
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.4179 2.6415 14.923 7.42e-15 ***
## wt -3.1251 0.9109 -3.431 0.00189 **
## cyl -1.5102 0.4223 -3.576 0.00129 **
## factor(am)1 0.1765 1.3045 0.135 0.89334
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.612 on 28 degrees of freedom
## Multiple R-squared: 0.8303, Adjusted R-squared: 0.8122
## F-statistic: 45.68 on 3 and 28 DF, p-value: 6.51e-11
pred_r_squared(lm07)
## [1] 0.7756775
library(car)
vif(lm07)
## wt cyl factor(am)
## 3.609011 2.584066 1.924955
par(mfrow=c(2,2))
plot(lm07)
lm08 <- lm(mpg ~ wt * factor(am), data = mtcars)
summary(lm08)
##
## Call:
## lm(formula = mpg ~ wt * factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6004 -1.5446 -0.5325 0.9012 6.0909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.4161 3.0201 10.402 4.00e-11 ***
## wt -3.7859 0.7856 -4.819 4.55e-05 ***
## factor(am)1 14.8784 4.2640 3.489 0.00162 **
## wt:factor(am)1 -5.2984 1.4447 -3.667 0.00102 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.591 on 28 degrees of freedom
## Multiple R-squared: 0.833, Adjusted R-squared: 0.8151
## F-statistic: 46.57 on 3 and 28 DF, p-value: 5.209e-11
pred_r_squared(lm08)
## [1] 0.7857928
library(car)
vif(lm08)
## wt factor(am) wt:factor(am)
## 2.728248 20.901259 15.366853
par(mfrow=c(2,2))
plot(lm08)
lm09 <- lm(mpg ~ wt * am + qsec, data=mtcars)
summary(lm09)
##
## Call:
## lm(formula = mpg ~ wt * am + qsec, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5076 -1.3801 -0.5588 1.0630 4.3684
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.723 5.899 1.648 0.110893
## wt -2.937 0.666 -4.409 0.000149 ***
## am 14.079 3.435 4.099 0.000341 ***
## qsec 1.017 0.252 4.035 0.000403 ***
## wt:am -4.141 1.197 -3.460 0.001809 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.084 on 27 degrees of freedom
## Multiple R-squared: 0.8959, Adjusted R-squared: 0.8804
## F-statistic: 58.06 on 4 and 27 DF, p-value: 7.168e-13
pred_r_squared(lm09)
## [1] 0.8535095
library(car)
vif(lm09)
## wt am qsec wt:am
## 3.030963 20.970925 1.447406 16.302453
par(mfrow=c(2,2))
plot(lm09)
lm10 <- lm(mpg ~ wt + factor(am) + wt * factor(am), data=mtcars)
summary(lm10)
##
## Call:
## lm(formula = mpg ~ wt + factor(am) + wt * factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6004 -1.5446 -0.5325 0.9012 6.0909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.4161 3.0201 10.402 4.00e-11 ***
## wt -3.7859 0.7856 -4.819 4.55e-05 ***
## factor(am)1 14.8784 4.2640 3.489 0.00162 **
## wt:factor(am)1 -5.2984 1.4447 -3.667 0.00102 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.591 on 28 degrees of freedom
## Multiple R-squared: 0.833, Adjusted R-squared: 0.8151
## F-statistic: 46.57 on 3 and 28 DF, p-value: 5.209e-11
pred_r_squared(lm10)
## [1] 0.7857928
library(car)
vif(lm10)
## wt factor(am) wt:factor(am)
## 2.728248 20.901259 15.366853
par(mfrow=c(2,2))
plot(lm10)
lm11 <- lm(mpg ~ wt + factor(am) + factor(cyl), data=mtcars)
summary(lm11)
##
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(cyl), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4898 -1.3116 -0.5039 1.4162 5.7758
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.7536 2.8135 11.997 2.5e-12 ***
## wt -3.1496 0.9080 -3.469 0.00177 **
## factor(am)1 0.1501 1.3002 0.115 0.90895
## factor(cyl)6 -4.2573 1.4112 -3.017 0.00551 **
## factor(cyl)8 -6.0791 1.6837 -3.611 0.00123 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared: 0.8375, Adjusted R-squared: 0.8134
## F-statistic: 34.79 on 4 and 27 DF, p-value: 2.73e-10
pred_r_squared(lm11)
## [1] 0.7765213
library(car)
vif(lm11)
## GVIF Df GVIF^(1/(2*Df))
## wt 3.611208 1 1.900318
## factor(am) 1.925620 1 1.387667
## factor(cyl) 2.585745 2 1.268079
par(mfrow=c(2,2))
plot(lm11)
lm12 <- lm(mpg ~ wt + hp, data=mtcars)
summary(lm12)
##
## Call:
## lm(formula = mpg ~ wt + hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.941 -1.600 -0.182 1.050 5.854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.22727 1.59879 23.285 < 2e-16 ***
## wt -3.87783 0.63273 -6.129 1.12e-06 ***
## hp -0.03177 0.00903 -3.519 0.00145 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.593 on 29 degrees of freedom
## Multiple R-squared: 0.8268, Adjusted R-squared: 0.8148
## F-statistic: 69.21 on 2 and 29 DF, p-value: 9.109e-12
pred_r_squared(lm12)
## [1] 0.7810871
library(car)
vif(lm12)
## wt hp
## 1.766625 1.766625
par(mfrow=c(2,2))
plot(lm12)
lm13 <- lm(mpg ~ wt + carb, data=mtcars)
summary(lm13)
##
## Call:
## lm(formula = mpg ~ wt + carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5206 -2.1223 -0.0467 1.4551 5.9736
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.7300 1.7602 21.435 < 2e-16 ***
## wt -4.7646 0.5765 -8.265 4.12e-09 ***
## carb -0.8215 0.3492 -2.353 0.0256 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.839 on 29 degrees of freedom
## Multiple R-squared: 0.7924, Adjusted R-squared: 0.7781
## F-statistic: 55.36 on 2 and 29 DF, p-value: 1.255e-10
pred_r_squared(lm13)
## [1] 0.7461679
library(car)
vif(lm13)
## wt carb
## 1.223761 1.223761
par(mfrow=c(2,2))
plot(lm13)
lm14 <- lm(mpg ~ wt + disp, data=mtcars)
summary(lm14)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4087 -2.3243 -0.7683 1.7721 6.3484
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.96055 2.16454 16.151 4.91e-16 ***
## wt -3.35082 1.16413 -2.878 0.00743 **
## disp -0.01773 0.00919 -1.929 0.06362 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared: 0.7809, Adjusted R-squared: 0.7658
## F-statistic: 51.69 on 2 and 29 DF, p-value: 2.744e-10
pred_r_squared(lm14)
## [1] 0.725321
library(car)
vif(lm14)
## wt disp
## 4.728319 4.728319
par(mfrow=c(2,2))
plot(lm14)
lm15 <- lm(mpg ~ wt + factor(vs), data=mtcars)
summary(lm15)
##
## Call:
## lm(formula = mpg ~ wt + factor(vs), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7071 -2.4415 -0.3129 1.4319 6.0156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.0042 2.3554 14.012 1.92e-14 ***
## wt -4.4428 0.6134 -7.243 5.63e-08 ***
## factor(vs)1 3.1544 1.1907 2.649 0.0129 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.78 on 29 degrees of freedom
## Multiple R-squared: 0.801, Adjusted R-squared: 0.7873
## F-statistic: 58.36 on 2 and 29 DF, p-value: 6.818e-11
pred_r_squared(lm15)
## [1] 0.7530933
library(car)
vif(lm15)
## wt factor(vs)
## 1.444943 1.444943
par(mfrow=c(2,2))
plot(lm15)
lm16 <- lm(mpg ~ hp + factor(vs), data=mtcars)
summary(lm16)
##
## Call:
## lm(formula = mpg ~ hp + factor(vs), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7131 -2.3336 -0.1332 1.9055 7.9055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.96300 2.89069 9.328 3.13e-10 ***
## hp -0.05453 0.01448 -3.766 0.000752 ***
## factor(vs)1 2.57622 1.96966 1.308 0.201163
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.818 on 29 degrees of freedom
## Multiple R-squared: 0.6246, Adjusted R-squared: 0.5987
## F-statistic: 24.12 on 2 and 29 DF, p-value: 6.768e-07
pred_r_squared(lm16)
## [1] 0.5082001
library(car)
vif(lm16)
## hp factor(vs)
## 2.09586 2.09586
par(mfrow=c(2,2))
plot(lm16)
lm17 <- lm(mpg ~ disp + factor(vs), data=mtcars)
summary(lm17)
##
## Call:
## lm(formula = mpg ~ disp + factor(vs), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4605 -2.0260 -0.6467 1.7285 7.0790
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.949282 2.201166 12.697 2.27e-13 ***
## disp -0.036896 0.006715 -5.494 6.43e-06 ***
## factor(vs)1 1.495004 1.651290 0.905 0.373
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.261 on 29 degrees of freedom
## Multiple R-squared: 0.7261, Adjusted R-squared: 0.7072
## F-statistic: 38.44 on 2 and 29 DF, p-value: 7.005e-09
pred_r_squared(lm17)
## [1] 0.66982
library(car)
vif(lm17)
## disp factor(vs)
## 2.018941 2.018941
par(mfrow=c(2,2))
plot(lm17)
library(MASS)
fit<- lm(mpg~cyl+disp+hp+drat+wt+qsec+gear+carb,data=mtcars)
step <- stepAIC(fit, direction="both")
## Start: AIC=69.12
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear + carb
##
## Df Sum of Sq RSS AIC
## - carb 1 0.6725 158.80 67.260
## - cyl 1 1.2886 159.41 67.384
## - drat 1 3.2639 161.39 67.778
## - disp 1 3.7211 161.84 67.869
## - qsec 1 5.2229 163.35 68.164
## - gear 1 5.3164 163.44 68.183
## - hp 1 6.9592 165.08 68.503
## <none> 158.12 69.125
## - wt 1 30.4892 188.61 72.767
##
## Step: AIC=67.26
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear
##
## Df Sum of Sq RSS AIC
## - cyl 1 2.055 160.85 65.672
## - drat 1 2.810 161.61 65.822
## - gear 1 4.681 163.48 66.190
## - qsec 1 7.390 166.19 66.716
## <none> 158.80 67.260
## - disp 1 11.211 170.01 67.443
## - hp 1 12.981 171.78 67.775
## + carb 1 0.673 158.12 69.125
## - wt 1 76.291 235.09 77.815
##
## Step: AIC=65.67
## mpg ~ disp + hp + drat + wt + qsec + gear
##
## Df Sum of Sq RSS AIC
## - drat 1 4.936 165.79 64.639
## - gear 1 9.278 170.13 65.466
## - disp 1 9.846 170.70 65.573
## <none> 160.85 65.672
## - qsec 1 17.833 178.69 67.036
## - hp 1 18.419 179.27 67.141
## + cyl 1 2.055 158.80 67.260
## + carb 1 1.439 159.41 67.384
## - wt 1 84.464 245.32 77.178
##
## Step: AIC=64.64
## mpg ~ disp + hp + wt + qsec + gear
##
## Df Sum of Sq RSS AIC
## - disp 1 8.692 174.48 64.274
## <none> 165.79 64.639
## + drat 1 4.936 160.85 65.672
## + cyl 1 4.182 161.61 65.822
## - qsec 1 17.694 183.48 65.884
## - gear 1 19.848 185.63 66.258
## - hp 1 20.149 185.94 66.310
## + carb 1 0.890 164.90 66.467
## - wt 1 91.501 257.29 76.703
##
## Step: AIC=64.27
## mpg ~ hp + wt + qsec + gear
##
## Df Sum of Sq RSS AIC
## - qsec 1 11.057 185.54 64.241
## <none> 174.48 64.274
## - gear 1 11.579 186.06 64.331
## + disp 1 8.692 165.79 64.639
## - hp 1 13.505 187.99 64.660
## + carb 1 6.845 167.63 64.994
## + drat 1 3.783 170.70 65.573
## + cyl 1 1.973 172.51 65.911
## - wt 1 105.091 279.57 77.361
##
## Step: AIC=64.24
## mpg ~ hp + wt + gear
##
## Df Sum of Sq RSS AIC
## - gear 1 9.510 195.05 63.840
## <none> 185.54 64.241
## + qsec 1 11.057 174.48 64.274
## + carb 1 10.850 174.69 64.312
## + cyl 1 9.773 175.76 64.509
## + drat 1 4.176 181.36 65.512
## + disp 1 2.056 183.48 65.884
## - hp 1 91.647 277.19 75.086
## - wt 1 94.554 280.09 75.420
##
## Step: AIC=63.84
## mpg ~ hp + wt
##
## Df Sum of Sq RSS AIC
## + cyl 1 18.427 176.62 62.665
## <none> 195.05 63.840
## + drat 1 11.366 183.68 63.919
## + gear 1 9.510 185.54 64.241
## + qsec 1 8.988 186.06 64.331
## + carb 1 0.300 194.75 65.791
## + disp 1 0.057 194.99 65.831
## - hp 1 83.274 278.32 73.217
## - wt 1 252.627 447.67 88.427
##
## Step: AIC=62.66
## mpg ~ hp + wt + cyl
##
## Df Sum of Sq RSS AIC
## <none> 176.62 62.665
## - hp 1 14.551 191.17 63.198
## + disp 1 6.176 170.44 63.526
## - cyl 1 18.427 195.05 63.840
## + carb 1 2.519 174.10 64.205
## + drat 1 2.245 174.38 64.255
## + qsec 1 1.401 175.22 64.410
## + gear 1 0.856 175.76 64.509
## - wt 1 115.354 291.98 76.750
step$anova
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## mpg ~ cyl + disp + hp + drat + wt + qsec + gear + carb
##
## Final Model:
## mpg ~ hp + wt + cyl
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 23 158.1237 69.12453
## 2 - carb 1 0.6725191 24 158.7962 67.26034
## 3 - cyl 1 2.0553287 25 160.8515 65.67186
## 4 - drat 1 4.9362500 26 165.7878 64.63912
## 5 - disp 1 8.6924752 27 174.4802 64.27442
## 6 - qsec 1 11.0574885 28 185.5377 64.24071
## 7 - gear 1 9.5100407 29 195.0478 63.84027
## 8 + cyl 1 18.4272345 28 176.6205 62.66456
lm18 <- lm(mpg ~ hp + wt + cyl, data=mtcars)
summary(lm18)
##
## Call:
## lm(formula = mpg ~ hp + wt + cyl, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9290 -1.5598 -0.5311 1.1850 5.8986
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.75179 1.78686 21.687 < 2e-16 ***
## hp -0.01804 0.01188 -1.519 0.140015
## wt -3.16697 0.74058 -4.276 0.000199 ***
## cyl -0.94162 0.55092 -1.709 0.098480 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.512 on 28 degrees of freedom
## Multiple R-squared: 0.8431, Adjusted R-squared: 0.8263
## F-statistic: 50.17 on 3 and 28 DF, p-value: 2.184e-11
pred_r_squared(lm18)
## [1] 0.7956775
library(car)
vif(lm18)
## hp wt cyl
## 3.258481 2.580486 4.757456
par(mfrow=c(2,2))
plot(lm18)
summary(lm(mpg~wt+factor(am)+factor(vs),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(vs), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7733 -2.2519 -0.3445 1.4129 5.6594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.0787 3.7480 8.025 9.71e-09 ***
## wt -3.7845 0.8981 -4.214 0.000236 ***
## factor(am)1 1.4913 1.4863 1.003 0.324262
## factor(vs)1 3.6150 1.2761 2.833 0.008454 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.779 on 28 degrees of freedom
## Multiple R-squared: 0.8079, Adjusted R-squared: 0.7873
## F-statistic: 39.25 on 3 and 28 DF, p-value: 3.659e-10
summary(lm(mpg~wt+factor(am)+factor(cyl),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(cyl), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4898 -1.3116 -0.5039 1.4162 5.7758
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.7536 2.8135 11.997 2.5e-12 ***
## wt -3.1496 0.9080 -3.469 0.00177 **
## factor(am)1 0.1501 1.3002 0.115 0.90895
## factor(cyl)6 -4.2573 1.4112 -3.017 0.00551 **
## factor(cyl)8 -6.0791 1.6837 -3.611 0.00123 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared: 0.8375, Adjusted R-squared: 0.8134
## F-statistic: 34.79 on 4 and 27 DF, p-value: 2.73e-10
summary(lm(mpg~wt+factor(am)+factor(gear),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(gear), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5798 -2.4056 -0.3692 1.8198 5.7713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.0955 3.1862 11.015 1.72e-11 ***
## wt -4.8782 0.7945 -6.140 1.46e-06 ***
## factor(am)1 0.1883 1.9942 0.094 0.925
## factor(gear)4 2.0769 1.7343 1.198 0.242
## factor(gear)5 -1.0615 2.3845 -0.445 0.660
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.968 on 27 degrees of freedom
## Multiple R-squared: 0.7888, Adjusted R-squared: 0.7575
## F-statistic: 25.2 on 4 and 27 DF, p-value: 8.931e-09
summary(lm(mpg~wt+factor(am)+factor(carb),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + factor(am) + factor(carb), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.301 -1.906 0.000 1.381 5.179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.688 3.669 8.910 4.45e-09 ***
## wt -3.523 1.058 -3.329 0.0028 **
## factor(am)1 2.498 1.827 1.367 0.1843
## factor(carb)2 -1.201 1.495 -0.804 0.4295
## factor(carb)3 -2.789 2.271 -1.228 0.2312
## factor(carb)4 -3.917 1.875 -2.089 0.0475 *
## factor(carb)6 -5.727 3.354 -1.707 0.1007
## factor(carb)8 -7.609 3.670 -2.073 0.0491 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.987 on 24 degrees of freedom
## Multiple R-squared: 0.8098, Adjusted R-squared: 0.7543
## F-statistic: 14.6 on 7 and 24 DF, p-value: 2.913e-07
summary(lm(mpg~wt+cyl+factor(am),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + cyl + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1735 -1.5340 -0.5386 1.5864 6.0812
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.4179 2.6415 14.923 7.42e-15 ***
## wt -3.1251 0.9109 -3.431 0.00189 **
## cyl -1.5102 0.4223 -3.576 0.00129 **
## factor(am)1 0.1765 1.3045 0.135 0.89334
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.612 on 28 degrees of freedom
## Multiple R-squared: 0.8303, Adjusted R-squared: 0.8122
## F-statistic: 45.68 on 3 and 28 DF, p-value: 6.51e-11
summary(lm(mpg~wt+hp+factor(am),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + hp + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4221 -1.7924 -0.3788 1.2249 5.5317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.002875 2.642659 12.867 2.82e-13 ***
## wt -2.878575 0.904971 -3.181 0.003574 **
## hp -0.037479 0.009605 -3.902 0.000546 ***
## factor(am)1 2.083710 1.376420 1.514 0.141268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared: 0.8399, Adjusted R-squared: 0.8227
## F-statistic: 48.96 on 3 and 28 DF, p-value: 2.908e-11
summary(lm(mpg~wt+gear+factor(am),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + gear + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1663 -2.4342 -0.2539 1.5132 6.6583
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.2114 5.2849 7.420 4.43e-08 ***
## wt -5.3798 0.8017 -6.710 2.77e-07 ***
## gear -0.5570 1.2619 -0.441 0.662
## factor(am)1 0.5938 2.1009 0.283 0.780
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.142 on 28 degrees of freedom
## Multiple R-squared: 0.7545, Adjusted R-squared: 0.7282
## F-statistic: 28.69 on 3 and 28 DF, p-value: 1.097e-08
summary(lm(mpg~wt+carb+factor(am),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + carb + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5856 -2.1105 0.1393 1.5248 5.1851
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.0163 2.9713 11.448 4.49e-12 ***
## wt -3.6340 0.9281 -3.915 0.000527 ***
## carb -1.1593 0.4063 -2.853 0.008046 **
## factor(am)1 2.5263 1.6479 1.533 0.136490
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.775 on 28 degrees of freedom
## Multiple R-squared: 0.8085, Adjusted R-squared: 0.788
## F-statistic: 39.41 on 3 and 28 DF, p-value: 3.5e-10
summary(lm(mpg~wt,data=mtcars))$fstatistic
## value numdf dendf
## 91.37533 1.00000 30.00000
summary(lm(mpg~cyl,data=mtcars))$fstatistic
## value numdf dendf
## 79.56103 1.00000 30.00000
summary(lm(mpg~disp,data=mtcars))$fstatistic
## value numdf dendf
## 76.51266 1.00000 30.00000
summary(lm(mpg~hp,data=mtcars))$fstatistic
## value numdf dendf
## 45.4598 1.0000 30.0000
summary(lm(mpg~drat,data=mtcars))$fstatistic
## value numdf dendf
## 25.96964 1.00000 30.00000
summary(lm(mpg~vs,data=mtcars))$fstatistic
## value numdf dendf
## 23.66224 1.00000 30.00000
summary(lm(mpg~am,data=mtcars))$fstatistic
## value numdf dendf
## 16.86028 1.00000 30.00000
summary(lm(mpg~carb,data=mtcars))$fstatistic
## value numdf dendf
## 13.07365 1.00000 30.00000
summary(lm(mpg~gear,data=mtcars))$fstatistic
## value numdf dendf
## 8.995144 1.000000 30.000000
summary(lm(mpg~qsec,data=mtcars))$fstatistic
## value numdf dendf
## 6.376702 1.000000 30.000000
the f statistic decreases with the correlation of each single variable
wt, cyl, disp, hp are all correlated
nm01 <- lm(mpg ~ wt, data=mtcars)
nm03 <- update(nm01, mpg ~ wt+cyl+disp)
nm05 <- update(nm01, mpg ~ wt+cyl+disp+hp+drat)
nm07 <- update(nm01, mpg ~ wt+cyl+disp+hp+drat+factor(vs)+factor(am))
nm09 <- update(nm01, mpg ~ wt+cyl+disp+hp+drat+factor(vs)+factor(am)+carb+gear)
nm10 <- update(nm01, mpg ~ wt+cyl+disp+hp+drat+factor(vs)+factor(am)+carb+gear+qsec)
anova(nm01,nm03,nm05,nm07,nm09,nm10)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + cyl + disp
## Model 3: mpg ~ wt + cyl + disp + hp + drat
## Model 4: mpg ~ wt + cyl + disp + hp + drat + factor(vs) + factor(am)
## Model 5: mpg ~ wt + cyl + disp + hp + drat + factor(vs) + factor(am) +
## carb + gear
## Model 6: mpg ~ wt + cyl + disp + hp + drat + factor(vs) + factor(am) +
## carb + gear + qsec
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 30 278.32
## 2 28 188.49 2 89.830 6.3949 0.006777 **
## 3 26 167.43 2 21.066 1.4997 0.246152
## 4 24 158.65 2 8.772 0.6245 0.545206
## 5 22 156.36 2 2.296 0.1634 0.850292
## 6 21 147.49 1 8.864 1.2621 0.273941
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nnm01 <- lm(mpg ~ wt, data=mtcars)
nnm03 <- update(nnm01, mpg ~ wt+disp+hp)
nnm05 <- update(nnm01, mpg ~ wt+disp+hp+drat+qsec)
anova(nnm01,nnm03,nnm05)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + disp + hp
## Model 3: mpg ~ wt + disp + hp + drat + qsec
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 30 278.32
## 2 28 194.99 2 83.331 6.3676 0.005614 **
## 3 26 170.13 2 24.862 1.8997 0.169802
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
nnnm01 <- lm(mpg ~ wt, data=mtcars)
nnnm03 <- update(nnm01, mpg ~ wt+cyl+disp)
nnnm05 <- update(nnm01, mpg ~ wt+cyl+factor(am)+hp+drat)
anova(nnnm01,nnnm03,nnnm05)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt
## Model 2: mpg ~ wt + cyl + disp
## Model 3: mpg ~ wt + cyl + factor(am) + hp + drat
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 30 278.32
## 2 28 188.49 2 89.830 6.8848 0.003986 **
## 3 26 169.62 2 18.875 1.4466 0.253694
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lm19 <- lm(mpg ~ wt + disp + hp, data=mtcars)
summary(lm19)
##
## Call:
## lm(formula = mpg ~ wt + disp + hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.891 -1.640 -0.172 1.061 5.861
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.105505 2.110815 17.579 < 2e-16 ***
## wt -3.800891 1.066191 -3.565 0.00133 **
## disp -0.000937 0.010350 -0.091 0.92851
## hp -0.031157 0.011436 -2.724 0.01097 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.639 on 28 degrees of freedom
## Multiple R-squared: 0.8268, Adjusted R-squared: 0.8083
## F-statistic: 44.57 on 3 and 28 DF, p-value: 8.65e-11
pred_r_squared(lm19)
## [1] 0.7678953
library(car)
vif(lm19)
## wt disp hp
## 4.844618 7.324517 2.736633
par(mfrow=c(2,2))
plot(lm19)
lm20 <- lm(mpg ~ wt + disp + hp + factor(am), data=mtcars)
summary(lm20)
##
## Call:
## lm(formula = mpg ~ wt + disp + hp + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4590 -1.6900 -0.3708 1.1301 5.5011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.209443 2.822826 12.119 1.98e-12 ***
## wt -3.046747 1.157119 -2.633 0.01383 *
## disp 0.002489 0.010377 0.240 0.81222
## hp -0.039323 0.012434 -3.163 0.00384 **
## factor(am)1 2.159271 1.435176 1.505 0.14405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.581 on 27 degrees of freedom
## Multiple R-squared: 0.8402, Adjusted R-squared: 0.8166
## F-statistic: 35.5 on 4 and 27 DF, p-value: 2.181e-10
pred_r_squared(lm20)
## [1] 0.7753741
library(car)
vif(lm20)
## wt disp hp factor(am)
## 5.963704 7.695157 3.381008 2.386005
par(mfrow=c(2,2))
plot(lm20)
sort(round(dfbetas(lm20),3))
## [1] -0.903 -0.456 -0.375 -0.346 -0.331 -0.303 -0.299 -0.298 -0.281 -0.264
## [11] -0.261 -0.260 -0.257 -0.255 -0.255 -0.253 -0.218 -0.215 -0.206 -0.206
## [21] -0.168 -0.168 -0.164 -0.162 -0.147 -0.144 -0.133 -0.126 -0.124 -0.114
## [31] -0.113 -0.111 -0.110 -0.101 -0.095 -0.092 -0.091 -0.090 -0.089 -0.089
## [41] -0.087 -0.083 -0.083 -0.075 -0.066 -0.062 -0.061 -0.060 -0.059 -0.057
## [51] -0.054 -0.053 -0.051 -0.044 -0.041 -0.041 -0.039 -0.038 -0.037 -0.036
## [61] -0.034 -0.034 -0.034 -0.033 -0.032 -0.031 -0.029 -0.023 -0.021 -0.018
## [71] -0.017 -0.016 -0.016 -0.014 -0.011 -0.010 -0.008 -0.007 -0.005 -0.003
## [81] -0.001 0.001 0.001 0.004 0.004 0.004 0.006 0.009 0.011 0.012
## [91] 0.012 0.013 0.014 0.017 0.023 0.023 0.026 0.028 0.030 0.035
## [101] 0.039 0.040 0.041 0.045 0.046 0.048 0.051 0.053 0.055 0.059
## [111] 0.060 0.060 0.061 0.067 0.068 0.074 0.075 0.079 0.080 0.095
## [121] 0.097 0.097 0.098 0.106 0.107 0.109 0.109 0.113 0.118 0.120
## [131] 0.121 0.121 0.128 0.129 0.135 0.138 0.139 0.146 0.153 0.154
## [141] 0.161 0.181 0.186 0.192 0.194 0.194 0.197 0.197 0.199 0.220
## [151] 0.241 0.266 0.291 0.302 0.375 0.404 0.472 0.502 0.819 0.873
sort(round(hatvalues(lm20),3))
## Merc 450SL Merc 450SLC Valiant
## 0.071 0.072 0.076
## Datsun 710 AMC Javelin Porsche 914-2
## 0.089 0.093 0.094
## Mazda RX4 Merc 450SE Dodge Challenger
## 0.097 0.098 0.101
## Hornet 4 Drive Fiat X1-9 Toyota Corolla
## 0.104 0.107 0.109
## Fiat 128 Mazda RX4 Wag Volvo 142E
## 0.112 0.123 0.130
## Merc 240D Merc 230 Camaro Z28
## 0.142 0.143 0.146
## Merc 280 Merc 280C Ferrari Dino
## 0.149 0.149 0.149
## Honda Civic Lotus Europa Hornet Sportabout
## 0.153 0.167 0.184
## Toyota Corona Pontiac Firebird Duster 360
## 0.190 0.196 0.199
## Chrysler Imperial Ford Pantera L Cadillac Fleetwood
## 0.231 0.271 0.273
## Lincoln Continental Maserati Bora
## 0.277 0.503
pairs(mtcars)
full <- lm(mpg~cyl+disp+hp+drat+wt+qsec+factor(vs)+factor(am)+gear+carb,data=mtcars)
summary(full)
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + factor(vs) +
## factor(am) + gear + carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## factor(vs)1 0.31776 2.10451 0.151 0.8814
## factor(am)1 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07
summary(lm(mpg~disp+hp+drat+wt+qsec+factor(vs)+factor(am)+gear+carb,data=mtcars))
##
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + factor(vs) +
## factor(am) + gear + carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4286 -1.5908 -0.0412 1.2120 4.5961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.96007 13.53030 0.810 0.4266
## disp 0.01283 0.01682 0.763 0.4538
## hp -0.02191 0.02091 -1.048 0.3062
## drat 0.83520 1.53625 0.544 0.5921
## wt -3.69251 1.83954 -2.007 0.0572 .
## qsec 0.84244 0.68678 1.227 0.2329
## factor(vs)1 0.38975 1.94800 0.200 0.8433
## factor(am)1 2.57743 1.94035 1.328 0.1977
## gear 0.71155 1.36562 0.521 0.6075
## carb -0.21958 0.78856 -0.278 0.7833
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.59 on 22 degrees of freedom
## Multiple R-squared: 0.8689, Adjusted R-squared: 0.8153
## F-statistic: 16.21 on 9 and 22 DF, p-value: 9.031e-08
summary(lm(mpg~disp+hp+drat+wt+qsec+factor(am)+gear+carb,data=mtcars))
##
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + factor(am) +
## gear + carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.356 -1.576 -0.149 1.218 4.604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.76828 11.89230 0.821 0.4199
## disp 0.01214 0.01612 0.753 0.4590
## hp -0.02095 0.01993 -1.051 0.3040
## drat 0.87510 1.49113 0.587 0.5630
## wt -3.71151 1.79834 -2.064 0.0505 .
## qsec 0.91083 0.58312 1.562 0.1319
## factor(am)1 2.52390 1.88128 1.342 0.1928
## gear 0.75984 1.31577 0.577 0.5692
## carb -0.24796 0.75933 -0.327 0.7470
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.535 on 23 degrees of freedom
## Multiple R-squared: 0.8687, Adjusted R-squared: 0.823
## F-statistic: 19.02 on 8 and 23 DF, p-value: 2.008e-08
summary(lm(mpg~disp+hp+drat+wt+qsec+factor(am)+gear,data=mtcars))
##
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + factor(am) +
## gear, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1200 -1.7753 -0.1446 1.0903 4.7172
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.19763 11.54220 0.797 0.43334
## disp 0.01552 0.01214 1.278 0.21342
## hp -0.02471 0.01596 -1.548 0.13476
## drat 0.81023 1.45007 0.559 0.58151
## wt -4.13065 1.23593 -3.342 0.00272 **
## qsec 1.00979 0.48883 2.066 0.04981 *
## factor(am)1 2.58980 1.83528 1.411 0.17104
## gear 0.60644 1.20596 0.503 0.61964
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.488 on 24 degrees of freedom
## Multiple R-squared: 0.8681, Adjusted R-squared: 0.8296
## F-statistic: 22.56 on 7 and 24 DF, p-value: 4.218e-09
summary(lm(mpg~disp+hp+drat+wt+qsec+factor(am),data=mtcars))
##
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + factor(am),
## data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2669 -1.6148 -0.2585 1.1220 4.5564
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.71062 10.97539 0.976 0.33848
## disp 0.01310 0.01098 1.193 0.24405
## hp -0.02180 0.01465 -1.488 0.14938
## drat 1.02065 1.36748 0.746 0.46240
## wt -4.04454 1.20558 -3.355 0.00254 **
## qsec 0.99073 0.48002 2.064 0.04955 *
## factor(am)1 2.98469 1.63382 1.827 0.07969 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.45 on 25 degrees of freedom
## Multiple R-squared: 0.8667, Adjusted R-squared: 0.8347
## F-statistic: 27.09 on 6 and 25 DF, p-value: 8.637e-10
summary(lm(mpg~disp+hp+wt+qsec+factor(am),data=mtcars))
##
## Call:
## lm(formula = mpg ~ disp + hp + wt + qsec + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5399 -1.7398 -0.3196 1.1676 4.5534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.36190 9.74079 1.474 0.15238
## disp 0.01124 0.01060 1.060 0.29897
## hp -0.02117 0.01450 -1.460 0.15639
## wt -4.08433 1.19410 -3.420 0.00208 **
## qsec 1.00690 0.47543 2.118 0.04391 *
## factor(am)1 3.47045 1.48578 2.336 0.02749 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.429 on 26 degrees of freedom
## Multiple R-squared: 0.8637, Adjusted R-squared: 0.8375
## F-statistic: 32.96 on 5 and 26 DF, p-value: 1.844e-10
summary(lm(mpg~hp+wt+qsec+factor(am),data=mtcars))
##
## Call:
## lm(formula = mpg ~ hp + wt + qsec + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4975 -1.5902 -0.1122 1.1795 4.5404
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.44019 9.31887 1.871 0.07215 .
## hp -0.01765 0.01415 -1.247 0.22309
## wt -3.23810 0.88990 -3.639 0.00114 **
## qsec 0.81060 0.43887 1.847 0.07573 .
## factor(am)1 2.92550 1.39715 2.094 0.04579 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.435 on 27 degrees of freedom
## Multiple R-squared: 0.8579, Adjusted R-squared: 0.8368
## F-statistic: 40.74 on 4 and 27 DF, p-value: 4.589e-11
summary(lm(mpg~wt+qsec+factor(am),data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + qsec + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4811 -1.5555 -0.7257 1.4110 4.6610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.6178 6.9596 1.382 0.177915
## wt -3.9165 0.7112 -5.507 6.95e-06 ***
## qsec 1.2259 0.2887 4.247 0.000216 ***
## factor(am)1 2.9358 1.4109 2.081 0.046716 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared: 0.8497, Adjusted R-squared: 0.8336
## F-statistic: 52.75 on 3 and 28 DF, p-value: 1.21e-11
plot(lm(mpg~wt+qsec+factor(am),data=mtcars))
lm21 <- lm(mpg ~ wt + qsec + factor(am), data=mtcars)
summary(lm21)
##
## Call:
## lm(formula = mpg ~ wt + qsec + factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4811 -1.5555 -0.7257 1.4110 4.6610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.6178 6.9596 1.382 0.177915
## wt -3.9165 0.7112 -5.507 6.95e-06 ***
## qsec 1.2259 0.2887 4.247 0.000216 ***
## factor(am)1 2.9358 1.4109 2.081 0.046716 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared: 0.8497, Adjusted R-squared: 0.8336
## F-statistic: 52.75 on 3 and 28 DF, p-value: 1.21e-11
pred_r_squared(lm21)
## [1] 0.7945881
library(car)
vif(lm21)
## wt qsec factor(am)
## 2.482952 1.364339 2.541437
par(mfrow=c(2,2))
plot(lm21)
the intercept is the response when the predictors are 0. we are not interested in wt and qsec = 0
lm22 <- lm(mpg~I(wt-mean(wt))+I(qsec-mean(qsec))+factor(am), data=mtcars)
summary(lm22)
##
## Call:
## lm(formula = mpg ~ I(wt - mean(wt)) + I(qsec - mean(qsec)) +
## factor(am), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4811 -1.5555 -0.7257 1.4110 4.6610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.8979 0.7194 26.271 < 2e-16 ***
## I(wt - mean(wt)) -3.9165 0.7112 -5.507 6.95e-06 ***
## I(qsec - mean(qsec)) 1.2259 0.2887 4.247 0.000216 ***
## factor(am)1 2.9358 1.4109 2.081 0.046716 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared: 0.8497, Adjusted R-squared: 0.8336
## F-statistic: 52.75 on 3 and 28 DF, p-value: 1.21e-11
pred_r_squared(lm22)
## [1] 0.7945881
library(car)
vif(lm22)
## I(wt - mean(wt)) I(qsec - mean(qsec)) factor(am)
## 2.482952 1.364339 2.541437
par(mfrow=c(2,2))
plot(lm22)